

input_s3_file=s3://llm-pdf-text-1/meta-index/scihub/v001/scihub/part-66210c190659-018706.jsonl
output_s3_file=s3://llm-pdf-text-1/pdf_gpu_output/ebook_index_v4/scihub/v001/scihub/part-66210c190659-264134.jsonl

python process_pdf_enroll.py \
--input ${input_s3_file} \
--output ${output_s3_file} \
--formula \
--ocr \
> task-logs/debug.log 2>&1
